most useful STATA code here.
partial contribution credit to PhD candidate: Sun Junqin, XJTU
Notes
use *…
import/export file
- 表明在哪个文件夹下做
cd path - 导入excel
insheet using .csv,clear
import excel .xlsx, firstrow clear *get firstrow as var name - 导出excel
export excel .xlsx //注意无长字符串
export delimited using “xxx.csv” * export csv
变量处理
*-主要变量 winsor 集中处理 //处理无规律循环
local xx "asset revenue saleexpense"
foreach v of varlist `xx'{
winsor `v' , p(0.01) gen(`v'_x) //按年进行winsor
rename `v' `v'_y
rename `v'_x `v'
}
* winsor 单个变量
winsor var, gen(var_1) p(0.01)
*append // 处理有规律循环
clear
forvalues i=1/23 {
import excel `i'.xlsx, firstrow clear
drop in 1/2
save `i'.dta ,replace
}
use 11.dta
forval x=12/17 {
append using `x'.dta
}
*检验独立董事是否连锁
forval i=1/1012 {
use qq.dta, clear
keep if mm==`i'
rename stkcd id
rename indepname name
drop mm
save `i'.dta, replace
merge 1:m nn using qq.dta
drop _merge
keep if name==indepname
save `i'.dta, replace
}
* 变更数据形式
reshape long inc, i(id) j(year) // inc 为变量名
*输出结果
ereturn list r2_a
输出回归结果
outreg2 [m52 m53 m51 ] using tab011, addstat(Adj. R2, e(r2_a)) bdec(3) tdec(2) title(table1 full sample ) keep( sum1 CSTR pro inter cash DOMIN STATE size lev ) sortvar( CSTR pro inter) word replace tstat
输出描述性统计
logout, save(mytable) word replace: tabstat , statistics( n MIN mean median MAX sd ) c(s) f(%10.4f)
* spearman和pwcorr相关系数
eststo clear
sysuse auto, clear
local vlist "price mpg weight length"
local upper
local lower `vlist'
expand 2, gen(version)
foreach v of local vlist {
egen rank = rank(`v') if version == 1
replace `v' = rank if version ==1
drop rank
}
foreach v of local vlist {
estpost correlate `v' `lower' if version == 0
foreach m in b rho p count {
matrix `m' = e(`m')
}
if "`upper'"!="" {
estpost correlate `v' `upper' if version == 1
foreach m in b rho p count {
matrix `m' = e(`m'), `m'
}
}
ereturn post b
foreach m in rho p count {
quietly estadd matrix `m' = `m'
}
eststo `v', title(`v')
local lower: list lower - v
local upper `upper' `v'
}
esttab using 20170220.rtf, replace rtf nonumbers mtitles noobs not b(a2)
logout, save(mytable1) excel replace:pwcorr_a
logout, save(mytable2) excel replace:spearman ,star(0.01)
logout, save(mytable3) excel replace:spearman ,star(0.05)
logout, save(mytable4) excel replace:spearman ,star(0.1)
** 配对
use 10.dta,clear
drop if area1==. //和西部进行配对
bys year indus plate area1: g exp=_n if area1
bys year indus plate: egen m=max(exp)
drop if m==.
expandcl m if area1!=1,cl(year Stkcd) gen(g)
bys year Stkcd: replace exp=_n if area1!=1
bys year indus plate exp: egen r=sum(size*area1)
g q=abs((size-r)/r) if area1!=1
bys year indus plate exp: egen u=min(q) if !area1
drop if u!=q&!area1
egen mat=group(year indus plate exp) //划分组
bysort mat:gen fre1=_N
drop if fre1==1
drop exp-fre1
* 两样本均值检验
ttest wTobinQ, by (area1)
* 两样本中位数检验
ranksum lev , by(SOE)
* 解决自变量集体定义标签
(1)将第二行的内容转置,并作为一个变量放入同一个文件数据中,起名,比如叫“varName”;
(2)运行下面的代码:
local i=0
foreach v of var _all {
local i=i'+1
local l= varname in
i’
la var v' "
l’”
}
drop varname
*数字文字对应表
label define repair 1 "好" 2 "较好" 3 "中" 4 "较差" 5 "差"
rencode place, replace //文字转数字便于使用 i.place
将数字转化为文字
tostring id ,replace
将文字转化为数字
destring id, replace
gen year=substr(time,1,4) //从前往后提取
gen id= substr(name,-7,6) // 从最后开始提取
gen mm=1 if strmatch( Profession ,"*教授*")
gen m333= real(regexr(m6,"分","")) //提取22.3分此类格式数据
gen m=length(var1) //变量的长度
*记录ttest
ttest mkv1,by(disclose)
ret list //查看暂元
logout, save(mytable1) excel replace
suest 检验
suest m3 m7
test [m3_mean]revenuechange_dummy_PCM = [m7_mean]revenuechange_dummy_PCM
*安装最新命令
ssc install ranktest, replace //ranktest为ado名
*适合小样本
twoway(scatter intangible liability) (lfit intangible liability) //拟合线,lfit为直线,qfit为曲线
scatter intangible liability, mlabel(n) mlabpos(6) // 散点图
*循环加返回值
use "C:\Users\lenovo\Desktop\1.dta" ,clear
set obs 733441
gen new=.
gen n=_n
gen t=0
forval i=1/3 {
forval j=1/3{
corr x`i' x`j'
replace t=t+1
replace new=r(rho) if n==t
replace new=. if `i'>=`j'&n==t
replace t=t-1 if `i'>=`j'
}
}
*便于循环中直接引用上述值
sort id1
sum id1
return list
global ff=r(max) //便于下一步直接代入,这一步重点
forval i=1/$ff {
qui reg earning returnwith1 if id1==`i'
replace afa=_b[_cons] if id1==`i'
replace beta=_b[returnwith1] if id1==`i'
}
esttab e1 f1 g1 h1 ,star(* 0.1 ** 0.05 *** 0.01) // 在屏幕上整体呈现
*将变量等分
xtile income5q=income, nquantile(5)
*以逗号进行分列
split var1,parse(,)destring
下载文件
copy http://www.cninfo.com.cn/finalpage/2016-08-30/1202650633.PDF F:\stata/650633.pdf ,replace
*单变量DID
diff var, t(year) p( list)
use test.dta, clear
levelsof A , clean local(stock)
foreach m in `stock' {
capture copy "http://ichart.finance.yahoo.com/table.csv?s=`m'&a=0&b=1&c=1990&d=11&e=31&f=2014&g=w&ignore=.cs" `m'.csv, replace
}
*排序
gsort id year -comp
areg BTD_new accurals i.year, absorb(id) //固定效应
*算引入后边际效应
xi:areg rd_sale_w c.Ltrading#c.Llev Llev Ltrading Lmtb LLNTA Lprofit Ltangi Lrated Lindus_growth_w Lindus_RDS i.fyear ,absorb(gvkey) cluster(gvkey) robust
est store m2
margins, dydx(*) //所有的变量
margins, dydx(Llev) at(Ltrading==1)
clear
forvalues i=1/1208 {
use finalfinal.dta,clear
drop in `i'
reg ROA1 treatpost politicalnsoe post logasset wleverage BTM1 SALLGR1 tenure2 Acc1-Acc12 indus1-indus23 ,robust
gen cof1=_b[treatpost]
gen cof2=_se[treatpost]
gen t=cof1/cof2
keep in 1
gen mmmm=`i'
save `i'.dta ,replace
}
use 1.dta,clear
forval x=2/1208 {
append using `x'.dta
}
sort t
*计算CAR
**download index *
clear
capture mkdir "D:/事件研究/"
cap which cnindex
if _rc!=0{
ssc install cnindex
}
cap which cntrade
if _rc!=0{
ssc install cntrade
}
cnindex 000300, path("D:/事件研究/") /*由于为00开头,所以选取深圳成指*/
keep date rit
rename rit rmt
sort date
save "D:/事件研究/index.dta",replace
*******Events
clear
input stkcd str10 Eventdate
2 "2014-04-14"
600900 "2015-04-14"
28 "2016-04-14"
600000 "2016-04-14"
601898 "2016-04-14"
601988 "2016-04-14"
601666 "2016-04-14"
end
set matsize 500
cap gen Eventdate1=date(Eventdate,"YMD")
drop Eventdate
rename Eventdate1 Eventdate
format Eventdate %dCY-N-D
sort stkcd Eventdate
*******Events Matrix
local N=_N
mkmat stkcd Eventdate, mat(event)
**save the event into a matrix called event
capture postclose event
postfile event id stkcd CAR1 CAR2 CAR3 CAR4 CAR5 CAR6 CAR7 CAR8 CAR9 CAR10 CAR11 CAR12 CAR13 CAR14 using "D:/事件研究/事件研究.dta",replace
forval i=1/`N' {
local date = scalar(event[`i',2])
local stkcd = scalar(event[`i',1])
dis `stkcd'
*生成rit(计算个股日收益率)
clear
cntrade `stkcd', path("D:/事件研究/")
keep stkcd date rit
destring rit, force replace
replace rit = rit/100
drop in 1
sort date
merge date using "D:/事件研究/index.dta", nokeep
drop _m
order stkcd date rit rmt
preserve
keep if date<`date'
gsort -date
gen time=-_n
keep if time>=-200
save "D:/事件研究/pre_event.dta", replace
restore
keep if date>=`date'
sort date
gen time = _n-1
keep if time<=10
append using "D:/事件研究/pre_event.dta"
qui reg rit rmt if time<=-10
predict abr if time>=-3, r
keep if time>=-3
sort time
gen CAR = sum(abr)
keep stkcd time CAR
local CAR1 = CAR[1]
local CAR2 = CAR[2]
local CAR3 = CAR[3]
local CAR4 = CAR[4]
local CAR5 = CAR[5]
local CAR6 = CAR[6]
local CAR7 = CAR[7]
local CAR8 = CAR[8]
local CAR9 = CAR[9]
local CAR10 = CAR[10]
local CAR11 = CAR[11]
local CAR12 = CAR[12]
local CAR13 = CAR[13]
local CAR14 = CAR[14]
post event (`i') (`stkcd') (`CAR1') (`CAR2') (`CAR3') (`CAR4') (`CAR5') (`CAR6') (`CAR7') (`CAR8') ///
(`CAR9') (`CAR10') (`CAR11') (`CAR12') (`CAR13') (`CAR14')
}
postclose event
use "D:/事件研究/事件研究.dta", clear
*分割
split v,parse(/)destring